by @tozCSS
In [56]:
import pandas as pd
import plotly.plotly as py
import plotly.tools as tls
from plotly.graph_objs import *
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
In [5]:
cd ../
In [23]:
df = pd.read_csv('data/US-headlines.csv',usecols=['newsdate','title','twcount'],parse_dates=[0])
df = df[~df.title.str.contains('The\s?Plazz')]
df13 = df[df['newsdate']<'2014-01-10']
df14 = df[df['newsdate']>='2014-01-10']
df.sort('twcount',ascending=False)
Out[23]:
In [65]:
# group by week
# kw_year = lambda x: str(x.year) + ' - ' + str(x.isocalendar()[1])
gw13 = df13.groupby(df['newsdate'].map(lambda x: x.isocalendar()[1])).sum()
gw14 = df14.groupby(df['newsdate'].map(lambda x: x.isocalendar()[1])).sum()
gm13 = df13.groupby(df['newsdate'].map(lambda x: x.month)).sum()
gm14 = df14.groupby(df['newsdate'].map(lambda x: x.month)).sum()
In [76]:
# is there a seasonal pattern that we see every year?
plt.plot(gw13);
plt.plot(gw14);
plt.legend(['2013','2014'],loc=4);
plt.title('Weekly Commentation of US Newsmakers');
plt.ylabel('# of tweets');
plt.xlabel('week of the year');
In [77]:
daily = df.groupby('newsdate').sum()
daily.plot()
Out[77]:
In [8]:
data = Data([Scatter(x=daily.index,y=daily.twcount)])
twc = df.groupby('newsdate').apply(lambda t: t[t.twcount == t.twcount.max()])
titles = twc[twc.newsdate.isin(daily.sort('twcount',ascending=False).head(11).index.values)]
titles = titles.sort('twcount',ascending=False)
titles = titles.reset_index(drop=True)
titles = pd.merge(titles,daily,left_on='newsdate',right_index=True)
titles.rename(columns={'twcount_x': 'twcount', 'twcount_y': 'twdaily'}, inplace=True)
titles
Out[8]:
In [14]:
def annotator(r):
if (r['newsdate'] == pd.to_datetime('2013-10-01') or
r['newsdate'] == pd.to_datetime('2014-12-09') or
r['newsdate'] == pd.to_datetime('2014-12-17')):
y = r['twdaily']-10
else:
y = r['twdaily']+30
return Annotation(x=r['newsdate'], y=y, xref='x', yref='y', text=r['title'], showarrow=False)
In [17]:
annotations = Annotations(map(annotator,titles.to_dict(orient='records')))
layout = Layout(title="News Commentary Tweet Counts of the U.S. Newsmakers (Jan 2013 - Jan 2015)",
annotations=annotations,yaxis=YAxis(title='Daily tweet counts'))
fig = Figure(data=data, layout=layout)
In [18]:
py.iplot(fig,filename="News Commentary Tweet Counts of the U.S. Newsmakers")
Out[18]:
In [20]:
#these tweets belong to...
df = pd.read_csv('data/US-tweeps.csv',encoding='utf-8')
tweeps = df.groupby(by='twhandle')['twtext'].count().order(ascending=False).head(100)
tweeps.plot()
Out[20]:
In [21]:
data = Data([Scatter(x=tweeps.index.values,y=tweeps.values)])
fig = Figure(data=data)
py.iplot(fig)
Out[21]: